{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# Audio Datasets\n\n``torchaudio`` provides easy access to common, publicly accessible\ndatasets. Please refer to the official documentation for the list of\navailable datasets.\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "# When running this tutorial in Google Colab, install the required packages\n# with the following.\n# !pip install torchaudio\n\nimport torch\nimport torchaudio\n\nprint(torch.__version__)\nprint(torchaudio.__version__)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Preparing data and utility functions (skip this section)\n\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "# @title Prepare data and utility functions. {display-mode: \"form\"}\n# @markdown\n# @markdown You do not need to look into this cell.\n# @markdown Just execute once and you are good to go.\n\n# -------------------------------------------------------------------------------\n# Preparation of data and helper functions.\n# -------------------------------------------------------------------------------\nimport multiprocessing\nimport os\n\nimport matplotlib.pyplot as plt\nfrom IPython.display import Audio, display\n\n\n_SAMPLE_DIR = \"_assets\"\nYESNO_DATASET_PATH = os.path.join(_SAMPLE_DIR, \"yes_no\")\nos.makedirs(YESNO_DATASET_PATH, exist_ok=True)\n\n\ndef plot_specgram(waveform, sample_rate, title=\"Spectrogram\", xlim=None):\n    waveform = waveform.numpy()\n\n    num_channels, num_frames = waveform.shape\n\n    figure, axes = plt.subplots(num_channels, 1)\n    if num_channels == 1:\n        axes = [axes]\n    for c in range(num_channels):\n        axes[c].specgram(waveform[c], Fs=sample_rate)\n        if num_channels > 1:\n            axes[c].set_ylabel(f\"Channel {c+1}\")\n        if xlim:\n            axes[c].set_xlim(xlim)\n    figure.suptitle(title)\n    plt.show(block=False)\n\n\ndef play_audio(waveform, sample_rate):\n    waveform = waveform.numpy()\n\n    num_channels, num_frames = waveform.shape\n    if num_channels == 1:\n        display(Audio(waveform[0], rate=sample_rate))\n    elif num_channels == 2:\n        display(Audio((waveform[0], waveform[1]), rate=sample_rate))\n    else:\n        raise ValueError(\"Waveform with more than 2 channels are not supported.\")"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Here, we show how to use the\n:py:func:`torchaudio.datasets.YESNO` dataset.\n\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "dataset = torchaudio.datasets.YESNO(YESNO_DATASET_PATH, download=True)\n\nfor i in [1, 3, 5]:\n    waveform, sample_rate, label = dataset[i]\n    plot_specgram(waveform, sample_rate, title=f\"Sample {i}: {label}\")\n    play_audio(waveform, sample_rate)"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.10.4"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}